با استفاده از داده های زلزله ها در ایران و جهان به سوالات زیر پاسخ دهید.
loading data and required packages…
library(readr)
library(dplyr)##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(highcharter)## Highcharts (www.highcharts.com) is a Highsoft software product which is
## not free for commercial and Governmental use
library(plotly)## Loading required package: ggplot2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(stringr)
hist_data <- read_rds('../../data/historical_web_data_26112015.rds')
disaster <- read_delim("../../data/disaster.txt", "\t", escape_double = FALSE, trim_ws = TRUE)## Parsed with column specification:
## cols(
## .default = col_integer(),
## FLAG_TSUNAMI = col_character(),
## SECOND = col_character(),
## EQ_PRIMARY = col_double(),
## EQ_MAG_MW = col_double(),
## EQ_MAG_MS = col_double(),
## EQ_MAG_MB = col_character(),
## EQ_MAG_ML = col_double(),
## EQ_MAG_MFA = col_character(),
## EQ_MAG_UNK = col_double(),
## COUNTRY = col_character(),
## STATE = col_character(),
## LOCATION_NAME = col_character(),
## LATITUDE = col_double(),
## LONGITUDE = col_double(),
## MISSING = col_character(),
## DAMAGE_MILLIONS_DOLLARS = col_character(),
## TOTAL_MISSING = col_character(),
## TOTAL_MISSING_DESCRIPTION = col_character(),
## TOTAL_DAMAGE_MILLIONS_DOLLARS = col_character()
## )
## See spec(...) for full column specifications.
iran_eqrthquake<- read_rds('../../data/iran_earthquake.rds')
myMap = read_rds("../../data/Tehrn_map_6.rds")
worldwide <- read_csv('../../data/worldwide.csv')## Parsed with column specification:
## cols(
## .default = col_double(),
## time = col_datetime(format = ""),
## magType = col_character(),
## nst = col_integer(),
## net = col_character(),
## id = col_character(),
## updated = col_datetime(format = ""),
## place = col_character(),
## type = col_character(),
## magNst = col_integer(),
## status = col_character(),
## locationSource = col_character(),
## magSource = col_character()
## )
## See spec(...) for full column specifications.
۱. با استفاده از داده های historical_web_data_26112015.rds و استفاده از نمودار پراکنش سه بعدی بسته plotly نمودار طول، عرض و عمق زلزله ها را رسم نمایید. علاوه بر آن بزرگی هر نقطه را برابر بزرگی زمین لرزه قرار دهید.
################################################################################################
## Q1
hist_data %>% plot_ly(type = 'scatter3d', mode='markers', x = ~Latitude, y = ~Longitude, z = ~-Depth, size = ~Magnitude,
marker = list(symbol = 'circle', sizemode = 'diameter'), sizes = c(1,20),
text = ~paste('Time:', Time, '<br>Province:', Province, '<br>City:', City,'<br>Magnitude:', Magnitude)) %>%
layout(title = 'Iran Earthquakes Data Visualization',
scene = list(xaxis = list(title = 'Lattitude'),
yaxis = list(title = 'Longitude'),
zaxis = list(title = 'Depth')
),
paper_bgcolor = 'rgb(243, 243, 243)',
plot_bgcolor = 'rgb(243, 243, 243)'
)۲. پویانمایی سونامی های تاریخی را بر حسب شدت بر روی نقشه زمین رسم نمایید.(از داده زلزله های بزرگ استفاده نمایید.)
################################################################################################
## Q2
disaster %>% filter(FLAG_TSUNAMI == 'Tsu') %>%
select(year = YEAR, mag = EQ_PRIMARY, lat = LATITUDE, lng = LONGITUDE) -> q2_data
min_year <- min(q2_data$year, na.rm = TRUE)
max_year <- max(q2_data$year, na.rm = TRUE)
q2_data_acc <- q2_data %>% filter(year == min_year)
for (year_iter in (min_year+1):max_year) {
q2_data_acc <- rbind(
q2_data_acc,
q2_data_acc %>% filter(year == year_iter-1) %>% mutate(year = year_iter),
q2_data %>% filter(year == year_iter)
)
}
map_data("world") %>%
plot_geo(locationmode = 'USA-states') %>%
add_markers(size = ~mag, x = ~lng, y = ~lat, data = q2_data, frame = ~year) %>%
animation_opts(
frame = 100,
transition = 0,
redraw = FALSE
) %>%
animation_slider(
hide = T
) %>%
animation_button(
x = 1, xanchor = "right", y = 0, yanchor = "bottom"
)## Warning: Ignoring 35 observations
۳. نمودار چگالی دو بعدی زلزله های تاریخی ایران را رسم کنید.( از داده iran_earthquake.rds و لایه stat_density_2d استفاده نمایید).
################################################################################################
## Q3
library(ggmap)## Google Maps API Terms of Service: http://developers.google.com/maps/terms.
## Please cite ggmap if you use it: see citation("ggmap") for details.
##
## Attaching package: 'ggmap'
## The following object is masked from 'package:plotly':
##
## wind
iran_eqrthquake$Latitude = as.numeric(iran_eqrthquake$Lat)
iran_eqrthquake$Longitude = as.numeric(iran_eqrthquake$Long)
ggmap(myMap) + geom_point(aes(x = Long, y = Lat), data = iran_eqrthquake, alpha = 0.5, size = 0.2) +
stat_density_2d(aes(x = Long, y = Lat), data = iran_eqrthquake)## Warning: Removed 243 rows containing non-finite values (stat_density2d).
## Warning: Removed 243 rows containing missing values (geom_point).
۴. احتمال اینکه در ایران در پنج سال آینده زلزله به بزرگی هفت ریشتر رخ دهد را محاسبه کنید. (از احتمال شرطی استفاده کنید.)
################################################################################################
## Q4
iran_eqrthquake %>% select(OriginTime, Mag) -> q4_data
q4_data %>% mutate(year = format(OriginTime,'%Y'), month = format(OriginTime,'%m')) %>% filter(year > 2000) %>%
mutate(month_count = (year %>% as.integer - 2006) * 12 + month %>% as.integer) %>% select(-year, -month) %>%
select(-OriginTime) %>% mutate(disaster = Mag >= 7) %>% group_by(month_count) %>% summarise(disaster = any(disaster)) %>%
ungroup %>% group_by(disaster) %>% summarise(frequency = n()) -> q4_disaster_prob
false <- q4_disaster_prob %>% filter(disaster == FALSE) %>% .$frequency
true <- q4_disaster_prob %>% filter(disaster == TRUE) %>% .$frequency
prob <- true / (true + false)
# next 5 years is 60 months we calculate probablility that no earthquake with Mag > 7 would happen in next 60 months.
final_prob_not <- (1 - prob) ^ 60
# the the probabiliry that in upcomming 60 months there is at least one earthquake with Mag > 7 is:
final_prob <- 1 - final_prob_not
final_prob %>% print## [1] 0.8714557
# of course this value has no significat meaning asd is this large because i've selected my baskets as months,
# selecting daily baskets would result in a much smaller number
# at last the calculated probability would pretty much dedpend on our definition of the question.۵. بر اساس داده های زلزله های بزرگ ابتدا تعداد و متوسط کشته زلزله ها را بر حسب کشور استخراج نمایید. سپس نمودار گرمایی تعداد کشته ها را بر روی کره زمین رسم نمایید.(مانند مثال زیر!)
################################################################################################
## Q5
disaster %>% select(country = COUNTRY, deaths = DEATHS) %>% group_by(country) %>%
summarise(tot_deaths = sum(deaths, na.rm = TRUE), mean_deaths = mean(deaths, na.rm = TRUE)) -> q5_data
library(rworldmap)## Loading required package: sp
## ### Welcome to rworldmap ###
## For a short introduction type : vignette('rworldmap')
matched <- joinCountryData2Map(q5_data %>% select(country, value = tot_deaths), joinCode="NAME", nameJoinColumn="country")## 138 codes from your data successfully matched countries in the map
## 16 codes from your data failed to match with a country code in the map
## 105 codes from the map weren't represented in your data
mapCountryData(matched, nameColumnToPlot="value", mapTitle="Total deaths in earthquakes", catMethod = "quantiles", colourPalette = "heat")## You asked for 7 quantiles, only 5 could be created in quantiles classification
۶. با استفاده از داده لرزه های بزرگ و به وسیله طول، عرض، شدت، عمق مدلی برای پیش بینی تعداد کشته های زلزله بیابید.
################################################################################################
## Q6
disaster %>%
select(lat = LATITUDE, lng = LONGITUDE, mag = EQ_PRIMARY, depth = FOCAL_DEPTH, deaths = TOTAL_DEATHS) %>%
.[complete.cases(.),] -> q6_data
library(h2o)##
## ----------------------------------------------------------------------
##
## Your next step is to start H2O:
## > h2o.init()
##
## For H2O package documentation, ask for help:
## > ??h2o
##
## After starting H2O, you can use the Web UI at http://localhost:54321
## For more information visit http://docs.h2o.ai
##
## ----------------------------------------------------------------------
##
## Attaching package: 'h2o'
## The following objects are masked from 'package:stats':
##
## cor, sd, var
## The following objects are masked from 'package:base':
##
## &&, %*%, %in%, ||, apply, as.factor, as.numeric, colnames,
## colnames<-, ifelse, is.character, is.factor, is.numeric, log,
## log10, log1p, log2, round, signif, trunc
glm(deaths~., data = q6_data, family = gaussian(link = 'identity')) %>% summary.glm()##
## Call:
## glm(formula = deaths ~ ., family = gaussian(link = "identity"),
## data = q6_data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -14162 -4354 -2228 19 311179
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.837e+04 3.599e+03 -5.105 3.93e-07 ***
## lat 6.956e+01 2.593e+01 2.682 0.00743 **
## lng -2.355e-01 6.757e+00 -0.035 0.97221
## mag 3.172e+03 5.370e+02 5.907 4.72e-09 ***
## depth -2.379e+01 1.384e+01 -1.719 0.08596 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 316173564)
##
## Null deviance: 3.4313e+11 on 1051 degrees of freedom
## Residual deviance: 3.3103e+11 on 1047 degrees of freedom
## AIC: 23582
##
## Number of Fisher Scoring iterations: 2
۷. با استفاده از داده worldwide.csv به چند سوال زیر پاسخ دهید. تحقیق کنید آیا می توان از پیش لرزه، زلزله اصلی را پیش بینی کرد؟
################################################################################################
## Q7
find_place <- function(string) {
tmp <- gregexpr(',', string)
start_index <- tmp[[length(string)]][1] + 2
result <- substr(string, start_index, nchar(string))
return(result)
}
q7_data <- worldwide
q7_data$place <- q7_data$place %>% sapply(find_place) %>% str_replace(., regex('region$', ignore_case = TRUE), '') %>%
str_replace(., regex('of the [^\\s]* coast of', ignore_case = TRUE), '') %>%
str_replace(., regex('[^\\s]* of (the)?', ignore_case = TRUE), '') %>%
str_replace(., regex('.* of', ignore_case = TRUE), '') %>%
str_replace(., regex('^off the ', ignore_case = TRUE), '') %>%
str_replace(., regex('islands?$', ignore_case = TRUE), '') %>%
str_trim(., side = 'both') %>% tolower(.)
q7_data %>% select(place, time, mag) %>% mutate(time = time %>% as.integer) -> q7_data
q7_data %>% arrange(place, time) -> q7_data
q7_data$time_diff <- q7_data$time - (q7_data$time %>% lag)
# according to wikipedia there is no currently known model for matching pre earthquakes to main earthquakes
# for example its proposed that earthquake of indonesia in 2002 was a pre earthquake for earthquake and tsunami
# in indian oscean in 2004!
# for the sake of this question, we consider a chain of earth rattles as a pre-main-post earthquake chain if
# time difference between no pair of them is more than 5 days.
q7_data %>% mutate(new_chain = time_diff > 5*24*3600) -> q7_data
q7_data$new_chain[1] <- FALSE
q7_data %>% mutate(chain_number = cumsum(new_chain)) %>% select(-new_chain, -time_diff) -> q7_ready_data
q7_ready_data %>% group_by(chain_number) %>% summarise(max_rank = which.max(mag), chain_lenght = n()) %>%
mutate(max_rel_pos = max_rank/chain_lenght) -> q7_ready_data_summarised
hchart(density(q7_ready_data_summarised$max_rel_pos), type = 'area', color = "#B71C1C", name='Distribution of main earthquake position in chain')hchart(density(q7_ready_data_summarised$chain_lenght), type = 'area', color = "#3BD641", name='Distribution of chain length')hchart(density(q7_ready_data_summarised %>% filter(chain_lenght < 620) %>% .$chain_lenght), type = 'area', color = "#3BD641", name='Distribution of chain length')hchart(density(q7_ready_data_summarised %>% filter(chain_lenght < 20) %>% .$chain_lenght), type = 'area', color = "#3BD641", name='Distribution of chain length')# as we can see with the really strong assumption that the consecutive durations of a chain are not longer than
# 5 days, we can see that for most of the chains, the biggest quake is the last one,
# but there is no clear understanding of how long a chain would actually be!
# perhaps with more information and a complex learnging model we can overcome this and predict chain length but
# the strong assumption we made in the first place makes it unachievable to predict the main quake from pre ones.۸. گزاره " آیا شدت زلزله به عمق آن بستگی دارد" را تحقیق کنید؟ (طبیعتا از آزمون فرض باید استفاده کنید.)
both correlation test and independence test show that this two are correlated and dependent.
################################################################################################
## Q8
worldwide %>% select(depth, mag) %>% .[complete.cases(.),] %>% filter(depth >= 0) -> q8_data
chisq.test(q8_data)## Warning in chisq.test(q8_data): Chi-squared approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: q8_data
## X-squared = 559670, df = 60431, p-value < 2.2e-16
cor.test(q8_data$depth, q8_data$mag, method = 'spearm')## Warning in cor.test.default(q8_data$depth, q8_data$mag, method = "spearm"):
## Cannot compute exact p-value with ties
##
## Spearman's rank correlation rho
##
## data: q8_data$depth and q8_data$mag
## S = 2.835e+13, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.2292678
۹. میانگین سالانه زلزله ها را بر حسب کشور به دست آورید. آیا میتوان دلیلی در تایید یا رد تئوری هارپ ارائه کرد.
################################################################################################
## Q9
q9_data <- worldwide
q9_data$place <- q9_data$place %>% sapply(find_place) %>% str_replace(., regex('region$', ignore_case = TRUE), '') %>%
str_replace(., regex('of the [^\\s]* coast of', ignore_case = TRUE), '') %>%
str_replace(., regex('[^\\s]* of (the)?', ignore_case = TRUE), '') %>%
str_replace(., regex('.* of', ignore_case = TRUE), '') %>%
str_replace(., regex('^off the ', ignore_case = TRUE), '') %>%
str_replace(., regex('islands?$', ignore_case = TRUE), '') %>%
str_trim(., side = 'both') %>% tolower(.)
q9_data %>% mutate(year = format(time, '%Y') %>% as.integer) -> q9_data
q9_data %>% select(year, place, mag) %>% group_by(place) %>%
summarise(count = n(), mean_mag = mean(mag, na.rm = TRUE)) %>% arrange(desc(count)) %>% ungroup %>%
slice(1:5) -> q9_plot_data
hchart(q9_plot_data, type = 'column', hcaes(x = place, y = count, color = mean_mag))q9_data %>% select(year, place, mag) %>%
filter(place %in% c('alaska', 'indonesia', 'oklahama', 'japan', 'puerto rico')) %>% group_by(place, year) %>%
summarise(count = n(), mean_mag = mean(mag, na.rm = TRUE)) %>% arrange(desc(count)) %>%
hchart(type = 'column', hcaes(x = place, y = count, group = year))# harp project base is located in alaska! :D but alaska has always many and some had big earthquakes.
# one cannot simply make a conclusion out of this.۱۰. سه حقیقت جالب در مورد زلزله بیابید.
# the pocture of earth crust pieces can be generated useing a simple earthquake visualizasion
bbox <- c(left = -170, bottom = -60, right = 170, top = 80)
# World_map <- get_stamenmap(bbox, zoom = 3, maptype="terrain")
# save(World_map, file='map_data/all_world_map.RData')
load(file = '../../map_data/all_world_map.RData')
World_map %>% ggmap(extent = "device") + geom_point(data = worldwide, aes(y = latitude, x = longitude), alpha = 0.5, color = 'red', size = 0.1)## Warning: Removed 9312 rows containing missing values (geom_point).
# Brazil, Russia and Canada are earthquake safe countries
# look at the island of hawaii! :D
# as described in question 10 alaska has the most number of earthquakes in recent years. it has 3 of 6 biggest
# recorded earthquakes in history